Check working directory

getwd()
## [1] "/Users/alexg/R files/hair_cortisol/skew-normal FINAL"

Load packages

library(readxl)
library(psych)
library(dlookr)
## Registered S3 methods overwritten by 'dlookr':
##   method          from  
##   plot.transform  scales
##   print.transform scales
## 
## Attaching package: 'dlookr'
## The following object is masked from 'package:psych':
## 
##     describe
## The following object is masked from 'package:base':
## 
##     transform
library(vtable)
## Loading required package: kableExtra
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
## 
##     group_rows
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(reshape)
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.22.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## 
## Attaching package: 'brms'
## The following object is masked from 'package:psych':
## 
##     cs
## The following object is masked from 'package:stats':
## 
##     ar
library(rethinking)
## Loading required package: cmdstanr
## This is cmdstanr version 0.8.0
## - CmdStanR documentation and vignettes: mc-stan.org/cmdstanr
## - CmdStan path: /Users/alexg/.cmdstan/cmdstan-2.36.0
## - CmdStan version: 2.36.0
## Loading required package: posterior
## This is posterior version 1.6.1
## 
## Attaching package: 'posterior'
## The following object is masked from 'package:dlookr':
## 
##     entropy
## The following objects are masked from 'package:stats':
## 
##     mad, sd, var
## The following objects are masked from 'package:base':
## 
##     %in%, match
## Loading required package: parallel
## rethinking (Version 2.42)
## 
## Attaching package: 'rethinking'
## The following objects are masked from 'package:brms':
## 
##     LOO, stancode, WAIC
## The following objects are masked from 'package:psych':
## 
##     logistic, logit, sim
## The following object is masked from 'package:stats':
## 
##     rstudent
library(loo)
## This is loo version 2.8.0
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
## 
## Attaching package: 'loo'
## The following object is masked from 'package:rethinking':
## 
##     compare
library(priorsense)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()      masks psych::%+%()
## ✖ ggplot2::alpha()    masks psych::alpha()
## ✖ tidyr::expand()     masks reshape::expand()
## ✖ tidyr::extract()    masks dlookr::extract()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ purrr::map()        masks rethinking::map()
## ✖ reshape::rename()   masks dplyr::rename()
## ✖ lubridate::stamp()  masks reshape::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vioplot)
## Loading required package: sm
## Package 'sm', version 2.2-6.0: type help(sm) for summary information
## 
## Attaching package: 'sm'
## 
## The following object is masked from 'package:dlookr':
## 
##     binning
## 
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(bayesplot)
## This is bayesplot version 1.12.0
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
## 
## Attaching package: 'bayesplot'
## 
## The following object is masked from 'package:posterior':
## 
##     rhat
## 
## The following object is masked from 'package:brms':
## 
##     rhat
library(bayestestR)

Load data

df <- read_xlsx("hair_cort_dog_all.xlsx", col_types = c("text", "text",  
                               "text", "text", "text", "text",
                               "text", "numeric","text", "skip",
                               "numeric", "skip", "skip", 
                               "numeric", "skip"))
df <- as.data.frame(df)

INITIAL DATA PLOTTING AND EXPLORATION

Check characteristics of df

dim(df) # will tell you how many rows and columns the dataset has
## [1] 73 11
class(df) # will tell you what data structure has the dataset been assigned
## [1] "data.frame"

Explore the dataset to understand its structure.

head(df)
##   number   group visit season breed_group coat_colour    sex age comorbidity
## 1     c1 stopped    v0 winter         ret        dark   Male  43         yes
## 2     c2 stopped    v0 autumn         mix        dark   Male 105         yes
## 3     c3 stopped    v0 spring        ckcs         mix Female 117         yes
## 4     c4 stopped    v0 summer         ret        dark Female 108         yes
## 5     c5 stopped    v0 summer         ret        dark Female 110         yes
## 6     c6 stopped    v0 winter         mix       light Female 120         yes
##   fat_percent cortisol
## 1    52.21393 4.924220
## 2    38.52059 7.304202
## 3    46.94916 1.590000
## 4    44.46813 0.861570
## 5    39.59363 6.217317
## 6          NA 4.426785

1. Get summary stats for numeric data

numeric_df <- Filter(is.numeric, df)
describe(numeric_df) # the describe function in psych provides summary stats
## # A tibble: 3 × 26
##   described_variables     n    na  mean    sd se_mean   IQR skewness kurtosis
##   <chr>               <int> <int> <dbl> <dbl>   <dbl> <dbl>    <dbl>    <dbl>
## 1 age                    73     0 95.8  35.6     4.16 44      -0.104 -0.00589
## 2 fat_percent            55    18 40.5   7.82    1.05  7.82   -0.294  1.12   
## 3 cortisol               73     0  8.11 16.5     1.93  5.43    4.05  18.7    
## # ℹ 17 more variables: p00 <dbl>, p01 <dbl>, p05 <dbl>, p10 <dbl>, p20 <dbl>,
## #   p25 <dbl>, p30 <dbl>, p40 <dbl>, p50 <dbl>, p60 <dbl>, p70 <dbl>,
## #   p75 <dbl>, p80 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, p100 <dbl>

2. Check normality of all numeric variables

a. graphical assessment

plot_normality(numeric_df)

b. shapiro-wilk test

apply(numeric_df, 2, shapiro.test)
## $age
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.97361, p-value = 0.1288
## 
## 
## $fat_percent
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.97956, p-value = 0.4692
## 
## 
## $cortisol
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.46269, p-value = 6.756e-15

c. repeat Q-Q plots with transformed data

i. log(cortisol)

qqnorm(df$cortisol)
qqline(df$cortisol, col = "red")

qqnorm(log(df$cortisol))
qqline(log(df$cortisol), col = "red")

ii Shapiro test for log cortisol

shapiro.test(log(df$cortisol))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(df$cortisol)
## W = 0.94725, p-value = 0.004126

3. Check data numerically

summary(df$cortisol)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   0.4141   1.4119   2.3331   8.1089   6.8455 104.6172
summary(log(df$cortisol))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8817  0.3449  0.8472  1.1816  1.9236  4.6503

a.log-transform cortisol

df$lgCort <- log(df$cortisol)
summary(df$lgCort)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8817  0.3449  0.8472  1.1816  1.9236  4.6503

i. visualise

hist(df$lgCort)

b. Create simple category name for breed and convert to factor

df$breed <- df$breed_group
df$breed <- factor(df$breed, levels = c("mix", "ckcs", "pug", "ret", "other"))
head(df$breed)
## [1] ret  mix  ckcs ret  ret  mix 
## Levels: mix ckcs pug ret other

c. reorder season so spring is reference and rest are in orderd

df$season <- factor(df$season, levels = c("spring", "summer", "autumn", "winter"))
head(df$season)
## [1] winter autumn spring summer summer winter
## Levels: spring summer autumn winter

4. Generate data summary

sumtable(df)
Summary Statistics
Variable N Mean Std. Dev. Min Pctl. 25 Pctl. 75 Max
group 73
… completed 42 58%
… stopped 31 42%
visit 73
… v0 52 71%
… v1 21 29%
season 73
… spring 14 19%
… summer 22 30%
… autumn 21 29%
… winter 16 22%
breed_group 73
… ckcs 7 10%
… mix 16 22%
… other 26 36%
… pug 7 10%
… ret 17 23%
coat_colour 73
… dark 30 41%
… light 28 38%
… mix 15 21%
sex 73
… Female 43 59%
… Male 30 41%
age 73 96 36 16 73 117 182
comorbidity 73
… no 15 21%
… yes 58 79%
fat_percent 55 40 7.8 18 37 45 61
cortisol 73 8.1 16 0.41 1.4 6.8 105
lgCort 73 1.2 1.2 -0.88 0.34 1.9 4.7
breed 73
… mix 16 22%
… ckcs 7 10%
… pug 7 10%
… ret 17 23%
… other 26 36%

5. Visualise associations

a. between Cortisol and season with a violin plot (vioplot package)

par(mfrow = c(1,1))
vioplot(cortisol ~ season, col = "firebrick",
        data = df)

b. between lgCortisol and breed with a violin plot (vioplot package)

par(mfrow = c(1,1))
vioplot(lgCort ~ season, col = "firebrick",
        data = df)

b. between lgCortisol and breed with stripchart

stripchart(lgCort ~ season, vertical = TRUE, method = "jitter",
           col = "steelblue3", data = df, pch = 20)

STANDARDISE DATA FOR MODELLING

1. Standardise cortisol

df$slgCort <- standardize(df$lgC)
summary(df$slgCort)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.7079 -0.6925 -0.2768  0.0000  0.6142  2.8713

a. visualise standardised lgCort

hist(df$slgCort, breaks =20, col = "steelblue3", main = "Histogram of log hair cortisol", xlab = "Log hair cortisol (standardised)", xlim = c(-2, 3))

2. create dataset only containing complete data

df2 <- na.omit(df)

MODEL FOR THE EFFECT OF SEASON ON HAIR CORTISOL

1. Model code

model <- brm(slgCort ~ season + (1 | visit), family = skew_normal(), data = df)

2. Check what priors need to be set

default_prior(slgCort ~ season +  (1 | visit),
                   family = skew_normal(),
                   data = df)
##                    prior     class         coef group resp dpar nlpar lb ub
##             normal(0, 4)     alpha                                         
##                   (flat)         b                                         
##                   (flat)         b seasonautumn                            
##                   (flat)         b seasonsummer                            
##                   (flat)         b seasonwinter                            
##  student_t(3, -0.3, 2.5) Intercept                                         
##     student_t(3, 0, 2.5)        sd                                     0   
##     student_t(3, 0, 2.5)        sd              visit                  0   
##     student_t(3, 0, 2.5)        sd    Intercept visit                  0   
##     student_t(3, 0, 2.5)     sigma                                     0   
##        source
##       default
##       default
##  (vectorized)
##  (vectorized)
##  (vectorized)
##       default
##       default
##  (vectorized)
##  (vectorized)
##       default

Published information about associations with hair cortisol

Little evidence of effect of season on hair cortisol. In one study (Roth), hair cortisol in January was hoigher in Jauary compared with May and September. However, it was not clear how these sample points related to other times of year (e.g., summer monhths, ohter months in winter, spring and autumn), but subtle and not clear. Therefore, we elected to set a neutral regularising prior allowing the model to learn from the data. Might be safer just to use a regularising prior. However, could try an alternative with a slight winter effect

Roth LS, Faresjö Å, Theodorsson E, Jensen P. Hair cortisol varies with season and lifestyle and relates to human interactions in German shepherd dogs. Sci Rep. 2016 Jan 21;6:19631. doi: 10.1038/srep19631

3. Set priors

# Set individual priors
prior_int <- set_prior("normal(0, 0.5)", class = "Intercept")
prior_sig <- set_prior("exponential(1)", class = "sigma")
prior_b <- set_prior("normal(0, 1)", class = "b")
prior_sd <- set_prior("normal(0, 1)", class = "sd")
prior_alpha <- set_prior("normal(4, 2)", class = "alpha")

# Combine priors into list
priors <- c(prior_int, prior_sig, prior_b, prior_sd, prior_alpha)

4. Plot prior

a. Prior for intercept

x <- seq(-3, 3, length.out = 100)
y <- dnorm(x, mean = 0, sd = 0.5)
plot(y ~ x, type = "l")

b. Prior for sigma

x <- seq(0, 3, length.out = 100)
y <- dexp(x, rate = 1)
plot(y ~ x, type = "l")

5. Run model

Increased adapt_delta >0.8 (0.9 here), as had divergent transitions

set.seed(666)
model <- brm(slgCort ~ season + (1 | visit),
                   family = skew_normal(),
                   prior = priors,
                   data = df,
                   control=list(adapt_delta=0.999, stepsize = 0.001, max_treedepth =15),
                   iter = 8000, warmup = 2000,
                   cores = 4,
                   save_pars = save_pars(all =TRUE),
                   sample_prior = TRUE)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
##   679 | #include <cmath>
##       |          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'

6. Get summary of model

summary(model)
##  Family: skew_normal 
##   Links: mu = identity; sigma = identity; alpha = identity 
## Formula: slgCort ~ season + (1 | visit) 
##    Data: df (Number of observations: 73) 
##   Draws: 4 chains, each with iter = 8000; warmup = 2000; thin = 1;
##          total post-warmup draws = 24000
## 
## Multilevel Hyperparameters:
## ~visit (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.36      0.36     0.01     1.35 1.00     7211     9113
## 
## Regression Coefficients:
##              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        0.08      0.31    -0.56     0.70 1.00     9968    11735
## seasonsummer    -0.18      0.26    -0.67     0.33 1.00    12907    12966
## seasonautumn    -0.32      0.27    -0.84     0.22 1.00    12768    13794
## seasonwinter     0.26      0.28    -0.29     0.82 1.00    12885    13103
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.99      0.09     0.83     1.19 1.00    15716    14164
## alpha     4.75      1.46     2.28     7.94 1.00    14979    14148
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

7. MCMC diagnostics

plot(model)

Looking for hairy caterpillars

b. try a trank plot as well

mcmc_plot(model, type = 'rank_overlay')

8. Calculate 95% HPDI for season

Usually better than the compatability intervals given in the summary ### a. summer

draws <- as.matrix(model)
HPDI(draws[,2], 0.97) # 1st column is raws for age
##      |0.97      0.97| 
## -0.7345002  0.3749654

a. autumn

draws <- as.matrix(model)
HPDI(draws[,3], 0.97) # 1st column is raws for age
##      |0.97      0.97| 
## -0.8887980  0.2880951

a. winter

draws <- as.matrix(model)
HPDI(draws[,4], 0.97) # 1st column is raws for age
##      |0.97      0.97| 
## -0.3419357  0.8846329

9. Calculate R2 for model

bayes_R2(model, probs = c(0.015, 0.5, 0.985)) # 0.015, 0.5, 0.985 are the quantiles
##      Estimate  Est.Error       Q1.5        Q50     Q98.5
## R2 0.07947026 0.04182219 0.01072127 0.07424133 0.1849599
loo_R2(model, probs = c(0.015, 0.5, 0.985)) # 0.015, 0.5, 0.985 are the quantiles
##       Estimate  Est.Error       Q1.5         Q50    Q98.5
## R2 -0.02536903 0.05524721 -0.1526872 -0.02368764 0.088321

CHECKS ON MODEL

1. Basic check of simulations based on posterior distribution, versus the real data distribution

checks whether actual data is similar to simulated data.

pp_check(model, ndraws = 100) 

2. Check some individual draws versus observed using pp_check

par(mfrow = c(1,1))
pp_check(model, type = "hist", ndraws = 11, binwidth = 0.25) # separate histograms of 11 MCMC draws vs actual data

3. Other pp_check graphs

pp_check(model, type = "error_hist", ndraws = 11) # separate histograms of errors for 11 draws
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

pp_check(model, type = "scatter_avg", ndraws = 100) # scatter plot

pp_check(model, type = "stat_2d") #  scatterplot of joint posteriors
## Using all posterior draws for ppc type 'stat_2d' by default.
## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.

# PPC functions for predictive checks based on (approximate) leave-one-out (LOO) cross-validation
pp_check(model, type = "loo_pit_overlay", ndraws = 1000) 
## NOTE: The kernel density estimate assumes continuous observations and is not optimal for discrete observations.

5. Pairs plot

pairs(model)

PSIS LOO-CV to check model performance

loo_model <- loo(model, moment_match = TRUE)
loo_model
## 
## Computed from 24000 by 73 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo   -100.7  6.1
## p_loo         5.3  1.1
## looic       201.4 12.1
## ------
## MCSE of elpd_loo is 0.0.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.5, 1.1]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.

AUTOMATED PRIOR SENSITIVITY USING THE PRIOR SENSE PACKAGE

1. Sensitivity check

First, check the sensitivity of the prior and likelihood to power-scaling. Posterior and posteriors resulting from power-scaling.

powerscale_sensitivity(model, variable = c("b_Intercept", "sigma", "b_seasonsummer", "b_seasonautumn", "b_seasonwinter"), facet_rows = "variable")
## Sensitivity based on cjs_dist
## Prior selection: all priors
## Likelihood selection: all data
## 
##        variable prior likelihood diagnosis
##     b_Intercept 0.037      0.035         -
##           sigma 0.035      0.146         -
##  b_seasonsummer 0.009      0.091         -
##  b_seasonautumn 0.020      0.091         -
##  b_seasonwinter 0.011      0.075         -

2. Kernel density

powerscale_plot_dens(model, variable = c("b_Intercept", "sigma", "b_seasonsummer", "b_seasonautumn", "b_seasonwinter"), facet_rows = "variable")

3. Empirical cumulative distribution functions

powerscale_plot_ecdf(model, variable = c("b_Intercept", "sigma", "b_seasonsummer", "b_seasonautumn", "b_seasonwinter"), facet_rows = "variable")

4. Quantities

powerscale_plot_quantities(model, vvariable = c("b_Intercept", "sigma", "b_seasonsummer", "b_seasonautumn", "b_seasonwinter"), facet_rows = "variable")

5. check mean and sd of mode to see if the issue can be identified

mean(model$data$slgCort)
## [1] -1.76419e-16
sd(model$data$slgCort)
## [1] 1

These values appear similar to what was set for the priors, so seems OK?

6. Now use bayestestR package to check priors are informative

check_prior(model, effects = "all")
##             Parameter Prior_Quality
## 1         b_Intercept   informative
## 2      b_seasonsummer   informative
## 3      b_seasonautumn   informative
## 4      b_seasonwinter   informative
## 5 sd_visit__Intercept   informative

CHECK PRIOR PREDICTION LINES FROM FINAL MODEL

1. Obtain draws of priors from final model

prior <- prior_draws(model)
prior %>% glimpse()
## Rows: 24,000
## Columns: 5
## $ Intercept <dbl> -0.16873777, -0.76254257, 0.72117808, -0.60843457, 0.3585218…
## $ b         <dbl> 0.38628806, 2.23779242, 0.70207565, -0.70573365, 1.43807172,…
## $ sigma     <dbl> 1.22757771, 0.11401542, 1.13981912, 1.96439820, 0.90385733, …
## $ alpha     <dbl> 1.9628879, -0.2067883, 1.2664916, 5.0834548, 7.6534384, 4.95…
## $ sd_visit  <dbl> 0.11822937, 0.59715140, 0.30821303, 0.15514865, 1.11922846, …

2. Plot prior prediction lines for season with line plot

set.seed(5)

prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(0, 1)) %>% 
  mutate(c = Intercept + b * a) %>% 
  
  ggplot(aes(x = a, y = c)) +
  geom_line(aes(group = draw),
            color = "firebrick", alpha = .4) +
  geom_point(color = "firebrick", size = 2) +
  labs(x = "Season",
       y = "log(cort) (std)") +
  coord_cartesian(ylim = c(-3, 3)) +
  theme_bw() +
  theme(panel.grid = element_blank()) 

CHECK PRIOR PREDICTIVE DISTRIBUTION

1. Prior Predictive Distribution

Can simulate data just on the priors. Fit model but only consider prior when fitting model. If this looks reasonable, it helps to confirm that your priors were reasonable

set.seed(666)
model_priors_only <- brm(slgCort ~ season + (1 | visit),
                   family = skew_normal(),
                   prior = priors,
                   data = df,
                   sample_prior = "only")
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
##   679 | #include <cmath>
##       |          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 6.1e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.61 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.019 seconds (Warm-up)
## Chain 1:                0.015 seconds (Sampling)
## Chain 1:                0.034 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 3e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.03 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.02 seconds (Warm-up)
## Chain 2:                0.021 seconds (Sampling)
## Chain 2:                0.041 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 5e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.05 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.017 seconds (Warm-up)
## Chain 3:                0.015 seconds (Sampling)
## Chain 3:                0.032 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 4e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.022 seconds (Warm-up)
## Chain 4:                0.016 seconds (Sampling)
## Chain 4:                0.038 seconds (Total)
## Chain 4:

2. Check predictions against priors

pp_check(model_priors_only, ndraws = 100)

VARIANCE-COVARIANCE MATRIX

as_draws_df(model) %>%
  select(b_Intercept:sigma) %>%
  cov() %>%
  round(digits = 3)
## Warning: Dropping 'draws_df' class as required metadata was removed.
##                     b_Intercept b_seasonsummer b_seasonautumn b_seasonwinter
## b_Intercept               0.097         -0.041         -0.043         -0.043
## b_seasonsummer           -0.041          0.065          0.039          0.039
## b_seasonautumn           -0.043          0.039          0.073          0.040
## b_seasonwinter           -0.043          0.039          0.040          0.078
## sd_visit__Intercept      -0.004          0.002          0.003          0.005
## sigma                     0.005          0.000          0.000          0.000
##                     sd_visit__Intercept sigma
## b_Intercept                      -0.004 0.005
## b_seasonsummer                    0.002 0.000
## b_seasonautumn                    0.003 0.000
## b_seasonwinter                    0.005 0.000
## sd_visit__Intercept               0.129 0.001
## sigma                             0.001 0.008

MANUAL POSTERIOR PREDICTIVE DISTRIBUTION CHECKS

NB Uses posterior_predict

1. Posterior predictive distribition plots for season

# use posterior predict to simulate predictions
ppd <- posterior_predict(model) 

par(mfrow = c(2,2))
stripchart(slgCort ~ season, vertical = TRUE, method = "jitter",
           col = "steelblue3", data = df, pch = 20, main = "Observed")
stripchart(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ season, vertical = TRUE, method = "jitter",
           col = "firebrick3", data = df, pch = 20, main = "PPD")
stripchart(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ season, vertical = TRUE, method = "jitter",
           col = "firebrick3", data = df, pch = 20, main = "PPD")
stripchart(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ season, vertical = TRUE, method = "jitter",
           col = "firebrick3", data = df, pch = 20, main = "PPD")

ANALYSING THE POSTERIOR DISTRIBUTION

1a. PBasic plot of conditional effects from model

plot(conditional_effects(model), ask = FALSE)

1b. advanced plot of conditional effect of season

ce <- conditional_effects(model, effects = "season")
ce_df <- ce[[1]][c(1, 6:9)]

ggplot(ce_df, aes(x=season, y=estimate__, group=1)) +
    geom_errorbar(width=.1, aes(ymin=lower__, ymax=upper__), colour=c("#F8766D", "#00BFC4","#7CAE00","#C77CFF"), linewidth = 1) +
    geom_point(shape=21, size=6, fill=c("#F8766D", "#00BFC4","#7CAE00","#C77CFF")) +
   theme_bw() +
    labs(title = "Conditional effect of season on hair cortisol") +
         labs(y = paste0("Log Hair Cortisol (standardised)")) +
         labs(x = paste0("Season")) +
         theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey25", size = 12),
               axis.text.y = element_text(color = "grey50", size = 10))

2. mcmc_plot of model

a. all variables versus priors

mcmc_plot(model)

b. just season versus prior

i. distributional

mcmc_plot(model,
          variable = c("b_seasonsummer", "b_seasonautumn",
                       "b_seasonwinter", "prior_b"))

2. density
mcmc_plot(model,
          variable = c("b_seasonsummer", "b_seasonautumn",
                        "b_seasonwinter", "prior_b"),
          type = "areas") +

   theme_classic() +
    labs(title = "Prior vs posterior distribution for season effect") +
         labs(y = "") +
         labs(x = paste0("Possible parameter values")) +
    scale_y_discrete(labels=c("prior_b" = "Prior", "b_seasonsummer" = "Summer posterior",
                              "b_seasonautumn" = "Autumn posterior", "b_seasonwinter" = "Winter posterior"),
                     limits = c("prior_b", "b_seasonsummer",
                              "b_seasonautumn", "b_seasonwinter")) +
         theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey50", size = 12),
               axis.text.y = element_text(color = "grey8",size = 12))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

a.just parameters of beta variables

mcmc_plot(model,
          variable = c(
         "b_seasonsummer",
         "b_seasonautumn",
         "b_seasonwinter"))

3. Plot all posterior distributions

posterior <- as.matrix(model)
mcmc_areas(posterior,
pars = c("b_Intercept",
         "sigma",
         "b_seasonsummer",
         "b_seasonautumn",
         "b_seasonwinter"),
# arbitrary threshold for shading probability mass
prob = 0.75)

4. Plot posterior distributions for season

posterior <- as.matrix(model)
mcmc_areas(posterior,
pars = c("b_seasonsummer",
         "b_seasonautumn",
         "b_seasonwinter"),
# arbitrary threshold for shading probability mass
prob = 0.97) +
  
   theme_classic() +
     labs(title = "Posterior distribution for season effect", 
         y = "Density distribution", 
         x = "Possible parameter values") +
     scale_y_discrete(labels=c("b_seasonsummer" = "Summer",
                              "b_seasonautumn" = "Autumn", "b_seasonwinter" = "Winter")) +
         theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey50", size = 12),
               axis.text.y = element_text(color = "grey8",size = 12))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

5. Describe the posterior visually

# Focus on describing posterior
hdi_range <- hdi(model, ci = c(0.65, 0.70, 0.80, 0.89, 0.95))
plot(hdi_range, show_intercept = T)

just season

# Focus on describing posterior
hdi_range <- hdi(model, ci = c(0.65, 0.70, 0.80, 0.89, 0.95),
                 parameters = c("b_seasonsummer", "b_seasonautumn",
                                "b_seasonwinter"))
plot(hdi_range, show_intercept = T) +

    labs(title = "Posterior distribution for season effect") +
         labs(y = "Density distribution") +
         labs(x = "Possible parameter values") +
           theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey50", size = 12),
               axis.text.y = element_text(color = "grey8",size = 12))

HYPOTHESIS TESTS

1. Hypothesis test to check if mean association between spring and other seasons

a. spring vs. summer (from draws) is >0

draws <- as.matrix(model)
mean(draws[,2] >0)
## [1] 0.2339583

Check 97% credible interval of with HPDI for spring vs summer from draws

HPDI(draws[,2], prob=0.97)
##      |0.97      0.97| 
## -0.7345002  0.3749654
mean(draws[,2] >0)
## [1] 0.2339583
mean(draws[,2] <0)
## [1] 0.7660417

b. spring vs. autumn (from draws) is >0

draws <- as.matrix(model)
mean(draws[,3] >0)
## [1] 0.117625
mean(draws[,3] <0)
## [1] 0.882375

Check 97% credible interval of with HPDI for spring vs autumn from draws

HPDI(draws[,3], prob=0.97)
##      |0.97      0.97| 
## -0.8887980  0.2880951

c. spring vs. winter (from draws) is >0 or <0

draws <- as.matrix(model)
mean(draws[, 4] >0)
## [1] 0.8286667
mean(draws[,4] <0)
## [1] 0.1713333

Check 97% credible interval of with HPDI for spring vs winter from draws

HPDI(draws[, 4], prob=0.97)
##      |0.97      0.97| 
## -0.3419357  0.8846329

3. Visualising the posterior of a model using numerical and graphical methods

a. basic (one dog only)

# create new dataframe which contains results of the first dog
new_data <- rbind(df[1,], df[1,], df[1,], df[1,])
# Now change one category to be different
new_data$season <- c("autumn", "winter", "spring", "summer")
# Visualise df to make sure it has worked
new_data
##   number   group visit season breed_group coat_colour  sex age comorbidity
## 1     c1 stopped    v0 autumn         ret        dark Male  43         yes
## 2     c1 stopped    v0 winter         ret        dark Male  43         yes
## 3     c1 stopped    v0 spring         ret        dark Male  43         yes
## 4     c1 stopped    v0 summer         ret        dark Male  43         yes
##   fat_percent cortisol   lgCort breed   slgCort
## 1    52.21393  4.92422 1.594166   ret 0.3415375
## 2    52.21393  4.92422 1.594166   ret 0.3415375
## 3    52.21393  4.92422 1.594166   ret 0.3415375
## 4    52.21393  4.92422 1.594166   ret 0.3415375
# Now get mean predictions from the draws of the model
pred_means <- posterior_predict(model, newdata = new_data)


# Compare difference in means for each  season versus Autumn
differenceWinter <- pred_means[,1] - pred_means[,2]

par(mfrow = c(2,2))

# Examine mean of difference
mean(differenceWinter)
## [1] -0.5704704
# View histogram of this
hist(differenceWinter)
# Create HPDI
HPDI(differenceWinter, 0.97)
##     |0.97     0.97| 
## -3.769518  2.672875
# Compare difference in means for each  season versus Autumn
differenceSpring <- pred_means[,1] - pred_means[,2]

par(mfrow = c(2,2))

# Examine mean of difference
mean(differenceSpring)
## [1] -0.5704704
# View histogram of this
hist(differenceSpring)
# Create HPDI
HPDI(differenceSpring, 0.97)
##     |0.97     0.97| 
## -3.769518  2.672875
# Compare difference in means for each  season versus Autumn
differenceSummer <- pred_means[,1] - pred_means[,2]

par(mfrow = c(2,2))

# Examine mean of difference
mean(differenceSummer)
## [1] -0.5704704
# View histogram of this
hist(differenceSummer)
# Create HPDI
HPDI(differenceSummer, 0.97)
##     |0.97     0.97| 
## -3.769518  2.672875

b. Advanced… using all dogs in the model

i. spring vs summer

# create new dataframe which contains results of all dogs
new_data1 <- df
# Now change one category to be different
new_data1$season <- c("spring")

# create new dataframe which contains result sof all dogs
new_data2 <- df
# Now change one category to be different
new_data1$season <- c("summer")

# Now get predictions from the draws of the models
pred_nd1 <- posterior_predict(model, newdata = new_data1)
pred_nd2 <- posterior_predict(model, newdata = new_data2)
pred_diff <- pred_nd1 - pred_nd2
pred_diff <- data.frame(pred_diff)

# Create mean of differences for each column (dog) of the dataframe
pred_diff_summer <- apply(pred_diff, 2, mean)
# View histogram of mean differences
hist(pred_diff_summer)

# Examine mean of difference
mean(pred_diff_summer)
## [1] -0.09230316
# View histogram of this

HPDI(pred_diff_summer, 0.97)
##      |0.97      0.97| 
## -0.4605680  0.1528126

ii. spring vs autumn

# create new dataframe which contains results of all dogs
new_data1 <- df
# Now change one category to be different
new_data1$season <- c("spring")

# create new dataframe which contains result sof all dogs
new_data2 <- df
# Now change one category to be different
new_data1$season <- c("autumn")

# Now get predictions from the draws of the models
pred_nd1 <- posterior_predict(model, newdata = new_data1)
pred_nd2 <- posterior_predict(model, newdata = new_data2)
pred_diff <- pred_nd1 - pred_nd2
pred_diff <- data.frame(pred_diff)

# Create mean of differences for each column (dog) of the dataframe
pred_diff_autumn <- apply(pred_diff, 2, mean)
# View histogram of mean differences
hist(pred_diff_autumn)

# Examine mean of difference
mean(pred_diff_autumn)
## [1] -0.2325993
# View histogram of this

HPDI(pred_diff_autumn, 0.97)
##       |0.97       0.97| 
## -0.59590142  0.01702301

i. spring vs winter

# create new dataframe which contains results of all dogs
new_data1 <- df
# Now change one category to be different
new_data1$season <- c("spring")

# create new dataframe which contains result sof all dogs
new_data2 <- df
# Now change one category to be different
new_data1$season <- c("winter")

# Now get predictions from the draws of the models
pred_nd1 <- posterior_predict(model, newdata = new_data1)
pred_nd2 <- posterior_predict(model, newdata = new_data2)
pred_diff <- pred_nd1 - pred_nd2
pred_diff <- data.frame(pred_diff)

# Create mean of differences for each column (dog) of the dataframe
pred_diff_winter <- apply(pred_diff, 2, mean)
# View histogram of mean differences
hist(pred_diff_winter)

# Examine mean of difference
mean(pred_diff_winter)
## [1] 0.3527753
# View histogram of this

HPDI(pred_diff_winter, 0.97)
##      |0.97      0.97| 
## -0.0113434  0.6059657

Make predictions of log cortisol for each dog and compare with actual data

pred_slgCort <- posterior_epred(model)
av_pred_slgCort <- colMeans(pred_slgCort)
plot(av_pred_slgCort ~ df$slgCort)

Check if better fit if you allow SD to vary arcoss breed

1. Set priors

# Set individual priors
prior_int <- set_prior("normal(0, 0.5)", class = "Intercept")
prior_b <- set_prior("normal(0, 1)", class = "b")
prior_sd <- set_prior("normal(0, 1)", class = "sd")
prior_alpha <- set_prior("normal(4, 2)", class = "alpha")

# Combine priors into list
priors2 <- c(prior_int, prior_b, prior_sd, prior_alpha)

2. Run model 2

Increased adapt_delta >0.8 (0.9 here), as had divergent transitions

set.seed(666)
model2 <- brm(bf(slgCort ~ season + (1 | visit),
                 sigma ~ season),
                   family = skew_normal(),
                   prior = priors2,
                   data = df,
                   control=list(adapt_delta=0.99),
                   save_pars = save_pars(all =TRUE),
                   sample_prior = TRUE)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
##   679 | #include <cmath>
##       |          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.000113 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.13 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 1.092 seconds (Warm-up)
## Chain 1:                0.792 seconds (Sampling)
## Chain 1:                1.884 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 1.9e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.19 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 1.068 seconds (Warm-up)
## Chain 2:                0.775 seconds (Sampling)
## Chain 2:                1.843 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 2.4e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.24 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 1.336 seconds (Warm-up)
## Chain 3:                0.912 seconds (Sampling)
## Chain 3:                2.248 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 1.7e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.17 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 1.112 seconds (Warm-up)
## Chain 4:                0.863 seconds (Sampling)
## Chain 4:                1.975 seconds (Total)
## Chain 4:

3. get summary of model

summary(model2)
##  Family: skew_normal 
##   Links: mu = identity; sigma = log; alpha = identity 
## Formula: slgCort ~ season + (1 | visit) 
##          sigma ~ season
##    Data: df (Number of observations: 73) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Multilevel Hyperparameters:
## ~visit (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.37      0.37     0.01     1.37 1.00     1583     2075
## 
## Regression Coefficients:
##                    Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept              0.12      0.33    -0.52     0.78 1.00     1734     2294
## sigma_Intercept        0.03      0.20    -0.32     0.43 1.00     1893     2392
## seasonsummer          -0.28      0.31    -0.91     0.30 1.00     1842     2061
## seasonautumn          -0.23      0.35    -0.92     0.45 1.00     1927     2312
## seasonwinter           0.22      0.33    -0.45     0.85 1.00     2094     2467
## sigma_seasonsummer    -0.14      0.26    -0.64     0.36 1.00     1848     2397
## sigma_seasonautumn     0.10      0.26    -0.41     0.59 1.00     1899     2355
## sigma_seasonwinter    -0.06      0.28    -0.61     0.46 1.00     2022     2576
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## alpha     4.84      1.43     2.29     7.88 1.00     3386     2887
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

4. Try the PSIS LOO-CV procedure to check model performance

loo_model2 <- loo(model2, moment_match = TRUE)
loo_model2
## 
## Computed from 4000 by 73 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo   -102.9  6.1
## p_loo         7.6  1.3
## looic       205.9 12.1
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.4, 1.2]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.

5. Compare looic for models 1 and 2

model <- add_criterion(model, "loo")
model2 <- add_criterion(model2, "loo")
## Warning: Found 1 observations with a pareto_k > 0.7 in model 'model2'. We
## recommend to set 'moment_match = TRUE' in order to perform moment matching for
## problematic observations.
loo_compare(model, model2)
##        elpd_diff se_diff
## model   0.0       0.0   
## model2 -2.3       1.0